{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "BOwsuGQQY9OL"
   },
   "outputs": [],
   "source": [
    "from tensorflow.keras.preprocessing.sequence import pad_sequences\n",
    "from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout, Bidirectional\n",
    "from tensorflow.keras.preprocessing.text import Tokenizer\n",
    "from tensorflow.keras.models import Sequential\n",
    "from tensorflow.keras.optimizers import Adam\n",
    "### YOUR CODE HERE\n",
    "# Figure out how to import regularizers\n",
    "###\n",
    "import tensorflow.keras.utils as ku \n",
    "import numpy as np "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "PRnDnCW-Z7qv"
   },
   "outputs": [],
   "source": [
    "tokenizer = Tokenizer()\n",
    "!wget --no-check-certificate \\\n",
    "    https://storage.googleapis.com/laurencemoroney-blog.appspot.com/sonnets.txt \\\n",
    "    -O /tmp/sonnets.txt\n",
    "data = open('/tmp/sonnets.txt').read()\n",
    "\n",
    "corpus = data.lower().split(\"\\n\")\n",
    "\n",
    "\n",
    "tokenizer.fit_on_texts(corpus)\n",
    "total_words = len(tokenizer.word_index) + 1\n",
    "\n",
    "# create input sequences using list of tokens\n",
    "input_sequences = []\n",
    "for line in corpus:\n",
    "\ttoken_list = tokenizer.texts_to_sequences([line])[0]\n",
    "\tfor i in range(1, len(token_list)):\n",
    "\t\tn_gram_sequence = token_list[:i+1]\n",
    "\t\tinput_sequences.append(n_gram_sequence)\n",
    "\n",
    "\n",
    "# pad sequences \n",
    "max_sequence_len = max([len(x) for x in input_sequences])\n",
    "input_sequences = np.array(pad_sequences(input_sequences, maxlen=max_sequence_len, padding='pre'))\n",
    "\n",
    "# create predictors and label\n",
    "predictors, label = input_sequences[:,:-1],input_sequences[:,-1]\n",
    "\n",
    "label = ku.to_categorical(label, num_classes=total_words)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "w9vH8Y59ajYL"
   },
   "outputs": [],
   "source": [
    "model = Sequential()\n",
    "model.add(# Your Embedding Layer)\n",
    "model.add(# An LSTM Layer)\n",
    "model.add(# A dropout layer)\n",
    "model.add(# Another LSTM Layer)\n",
    "model.add(# A Dense Layer including regularizers)\n",
    "model.add(# A Dense Layer)\n",
    "# Pick an optimizer\n",
    "model.compile(# Pick a loss function and an optimizer)\n",
    "print(model.summary())\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "AIg2f1HBxqof"
   },
   "outputs": [],
   "source": [
    " history = model.fit(predictors, label, epochs=100, verbose=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "1fXTEO3GJ282"
   },
   "outputs": [],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "acc = history.history['acc']\n",
    "loss = history.history['loss']\n",
    "\n",
    "epochs = range(len(acc))\n",
    "\n",
    "plt.plot(epochs, acc, 'b', label='Training accuracy')\n",
    "plt.title('Training accuracy')\n",
    "\n",
    "plt.figure()\n",
    "\n",
    "plt.plot(epochs, loss, 'b', label='Training Loss')\n",
    "plt.title('Training loss')\n",
    "plt.legend()\n",
    "\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "6Vc6PHgxa6Hm"
   },
   "outputs": [],
   "source": [
    "seed_text = \"Help me Obi Wan Kenobi, you're my only hope\"\n",
    "next_words = 100\n",
    "  \n",
    "for _ in range(next_words):\n",
    "\ttoken_list = tokenizer.texts_to_sequences([seed_text])[0]\n",
    "\ttoken_list = pad_sequences([token_list], maxlen=max_sequence_len-1, padding='pre')\n",
    "\tpredicted = model.predict_classes(token_list, verbose=0)\n",
    "\toutput_word = \"\"\n",
    "\tfor word, index in tokenizer.word_index.items():\n",
    "\t\tif index == predicted:\n",
    "\t\t\toutput_word = word\n",
    "\t\t\tbreak\n",
    "\tseed_text += \" \" + output_word\n",
    "print(seed_text)"
   ]
  }
 ],
 "metadata": {
  "accelerator": "GPU",
  "colab": {
   "name": "NLP-Week4-Exercise-Shakespeare-Question.ipynb",
   "provenance": [
    {
     "file_id": "1P_MeOoZuEGgMtvn3yVAQFWDhnuyBzrv1",
     "timestamp": 1559583333688
    },
    {
     "file_id": "1qmXwwCaT07-qviCiBV65lkus_KvwsyQI",
     "timestamp": 1559583256599
    },
    {
     "file_id": "1V60jn23JMcMpwhF-KvCTYIIfm4J2X6v5",
     "timestamp": 1558728367158
    }
   ],
   "version": "0.3.2"
  },
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
